clear 
set more off

*---------------------------------------------------------------------*
* STEP 1: PREPARE 1936 SURVEY FOR APPENDING WITH CENSUS 
*---------------------------------------------------------------------*

use "$Survey1936/RawData/pooled_data_w_occ_codes.dta", clear 

    keep husband_age wife_exists region deep_south south occ1_harm ///
         white own_home totkids urban_samp own_radio own_any_acres ///
         finish_6th no_hs_att some_hs hs_grad some_coll coll_ba V840 ///
         tot_fam_inc     
    rename totkids totkids_3
    
    * Keep fathers
        keep if totkids_3>0 //note: no missings 
        ren totkids_3 kidsperdad

    * Age
        tab husband_age, m 
        rename husband_age age

    * Race (keep black and white respondents)
        tab white, m
        gen race=.
        replace race = 1 if white==1
        replace race=2 if white==0
        tab race white, m

        keep if race==1 | race==2
        drop white

    * Region
        ren region region_merge
        tab region_merge, gen(reg)
        tab south
        rename south south_merge

    * Married
        gen married = (wife_exists==1)
        tab married,m 

    * Education

        //finished 8th grade
        gen grade8_grad = (V840>=8) if V840<43
        tab grade8_grad, m
        tab V840, m

        tab hs_grad,m 
        tab coll_ba, m

    * Ownership 
        // 1. Own home variable
        clonevar own_home1 = own_home  
        label var own_home1 "Comparison b/w own_home var (1936) and ownershp (Censuses)"
        
        // 2. Own any acres variable (another proxy for ownership)
        gen own_home2 =.
        replace own_home2 = own_home if occ1_harm==21
        replace own_home2 = own_any_acres if occ1_harm==81
        tab own_home2,m 
        label var own_home2 "Blended own home measure (1936) and ownershp (Censuses)"

    * Occupations (already coarsened to ~30 occupations)
        tab occ1_harm
        rename occ1_harm occ1950ej
        
    * Weight---no survey weights available 
        gen weight =1

    * Label where appropriate
        label var occ1950ej "Coarsened occupation"
        label var south_merge "Southern region"
        label var race "Race"

    * Dummy to indicate data source
        gen survey1936 =1

        keep weight age occ1950ej kidsperdad own_home2 own_radio ///
             grade8_grad hs_grad coll_ba urban_samp south_merge ///
             reg1-reg4 deep_south race married survey1936 tot_fam_inc
   
    tempfile 1936survey
    save `1936survey'

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

/*Note: Will count # of children <18 manually in the Censuses, as the 
        Census-provided variable counts children of ANY AGE.*/

*---------------------------------------------------------------------*
* STEP 2: PREPARE 1930 CENSUS (1%) FOR APPENDING WITH CENSUS 
*---------------------------------------------------------------------*

    use "$CensusData/input/Census1930_1pct_raw_ownershp.dta", clear

    tempfile fulldata
    save `fulldata'

    * Identify fathers 
        keep if age<18 //restrict to children younger than 18
        keep serial poploc age
                
        replace poploc=. if poploc==0 
        drop if poploc==. //exclude children without a father in the house

    * Count number of kids per father
        gen tagkid =1
        bysort serial poploc: egen kidsperdad = total(tagkid)
        label var kidsperdad "Number of kids living in Census father's household"
                
        bysort serial poploc: keep if _n==1 //keep all unique father ids. Some fathers will have multiple children in the Census. 
        rename poploc pernum
        drop age tagkid 

        tempfile children 
        save `children'

    * Keep the sample of fathers 
        use `fulldata', clear
        merge 1:1 serial pernum using `children'

        keep if _merge==3 
        drop _merge

    * Restrict age of fathers
        keep if age>=30 & age<=50

    * Keep only black and white fathers
        keep if race==1 | race==2

    * Weight
        ren perwt weight

    * Region (South, specifically)
        gen region_merge =.
        replace region_merge =1 if (region==11 | region==12) //Northeast
        replace region_merge =2 if (region==21 | region==22) //Midwest
        replace region_merge =3 if inrange(region,31,33) //South
        replace region_merge =4 if (region==41 | region==42) //West
        tab region, m
        tab region_merge, m
        tab region_merge, gen(reg)
                
        label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
        label values region_merge region_l
        tab region_merge, m
                
        gen south_merge = region_merge==3 

        gen deep_south = (inlist(statefip,1,13,22,28,45)) //note: no missings
        tab deep_south, m 

    * Marital status
        gen married = (inlist(marst,1,2))
        tab married,m 

    * Education---not available in 1930

    * Ownership of home
        gen own_home1 = (ownershp==1) 
        tab own_home1, m
        label var own_home1 "Comparison b/w own_home var (1936) and ownershp (Censuses)"

        clonevar own_home2 = own_home1 
        label var own_home2 "Comparison b/w own_any_acres var (1936) and ownershp (Censuses)"

    * Owns radio
        gen own_radio =.
        replace own_radio =1 if radio30==2
        replace own_radio =0 if radio30==1
        tab own_radio, m
        tab radio30, m

     * Occupation
        sort occ1950
        replace occ1950=. if inrange(occ1950,980,995) //e.g., "keeps house, student, retired, w/o occupation, invalid, inmate" 

        * Separate people with occupations in 200's based on self-employment
        replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
                
        * Crosswalk Census occupations to coarsened ANES occupations
        merge m:1 occ1950 using "$Crosswalks/Crosswalk_1950Census_toANES.dta"
        assert  _merge!=1 
        drop if _merge==2
        drop _merge
                
        assert occ1950==. if occ1950ej==. //note: no occ1950==997 in 1940
        drop if occ1950ej==. 
        tab occ1950 if occ1950ej==99, m nol
     
    * Income---not available in 1930

    * Dummy to indicate data source
        gen census1930 =1

        keep weight age occ1950ej kidsperdad own_home1 own_home2 ///
             own_radio south_merge reg1-reg4 race married region ///
             deep_south census1930

    tempfile census1930
    save `census1930'

*-----------------------------------------------------------------------------*
*-----------------------------------------------------------------------------*

*---------------------------------------------------------------------*
* STEP 3: PREPARE 1940 CENSUS (1%) FOR APPENDING WITH CENSUS 
*---------------------------------------------------------------------*

    use "$CensusData/input/Census1940_1pct_raw_ownershp.dta", clear

    tempfile fulldata
    save `fulldata'

    * Identify fathers 
        keep if age<18 //restrict to children younger than 18
        keep serial poploc age
                
        replace poploc=. if poploc==0 
        drop if poploc==. //exclude children without a father in the house

    * Count number of kids per father
        gen tagkid =1
        bysort serial poploc: egen kidsperdad = total(tagkid)
        label var kidsperdad "Number of kids living in Census father's household"
                
        bysort serial poploc: keep if _n==1 //keep all unique father ids. Some fathers will have multiple children in the Census. 
        rename poploc pernum
        drop age tagkid 

        tempfile children 
        save `children'

    * Keep the sample of fathers 
        use `fulldata', clear
        merge 1:1 serial pernum using `children'

        keep if _merge==3 
        drop _merge

    * Restrict age of fathers
        keep if age>=30 & age<=50

    * Keep only Black and white fathers
        keep if race==1 | race==2

    * Weight
        ren perwt weight

    * Region (South, specifically)
        gen region_merge =.
        replace region_merge =1 if (region==11 | region==12) //Northeast
        replace region_merge =2 if (region==21 | region==22) //Midwest
        replace region_merge =3 if inrange(region,31,33) //South
        replace region_merge =4 if (region==41 | region==42) //West
        tab region, m
        tab region_merge, m
        tab region_merge, gen(reg)
                
        label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
        label values region_merge region_l
        tab region_merge, m
                
        gen south_merge = region_merge==3 

        gen deep_south = (inlist(statefip,1,13,22,28,45)) //note: no missings
        tab deep_south, m 

    * Education

        //8th grade grad
        gen grade8_grad = (educd>=26) //note: no missings
        tab grade8_grad,m 

        //hs grad
        gen hs_grad = (educd>=60) //note: no missings
        tab hs_grad,m 

        //coll grad
        gen coll_ba = (educd>=100) //note: no missings
        tab coll_ba, m

        tab educd, m

    * Marital status
        gen married = (inlist(marst,1,2))
        tab married,m 

    * Ownership of home
        gen own_home1 = (ownershp==1) 
        tab own_home1, m
        label var own_home1 "Comparison b/w own_home var (1936) and ownershp (Censuses)"

        clonevar own_home2 = own_home1 
        label var own_home2 "Comparison b/w own_any_acres var (1936) and ownershp (Censuses)"

    * Owns radio---not available in 1940

    * Occupation
        sort occ1950
        replace occ1950=. if inrange(occ1950,980,995) //E.g., "keeps house, student, retired, w/o occupation, invalid, inmate"

        * Separate people with occupations in 200's based on self-employment
        replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
                
        * Crosswalk Census occupations to coarsened ANES occupations
        merge m:1 occ1950 using "$Crosswalks/Crosswalk_1950Census_toANES.dta"
        assert _merge!=1 
        drop if _merge==2
        drop _merge
                
        assert occ1950==. | occ1950==997 if occ1950ej==. 
        drop if occ1950ej==. 
        tab occ1950 if occ1950ej==99, m nol

    * Dummy to indicate data source                   
        gen census1940 =1

        keep weight age occ1950ej kidsperdad own_home1 own_home2 grade8_grad hs_grad coll_ba south_merge reg1-reg4 race married region deep_south census1940

    tempfile census1940
    save `census1940'

*---------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------*

*------------------------------------*
* STEP 4: APPEND DATA SOURCES
*------------------------------------*

    use `1936survey', clear
    append using `census1930'
    append using `census1940'
        
    * Variable to indicate data source
     gen data_source =.
            replace data_source =1 if survey1936==1
            replace data_source =2 if census1930==1
            replace data_source =3 if census1940==1

            label define dsource 1 "1936_survey" 2 "1930_Census" 3 "1940_Census"
            label values data_source dsource 

     tab data_source, m

    drop survey1936 census1930 census1940
        
    * Total num of dads 30-50 in each census
    bysort data_source: gen tot_dads=_N
    
    /* Restrict to fathers (black and white, aged 30-50) 
       that are farmers and self-employed businessmen */
        keep if occ1950ej==21 | occ1950ej==81

    * Restrict to most populous cells
        keep if (occ1950ej==21 & race==1) | (occ1950ej==81 & race==1) | (occ1950ej==81 & race==2 & south_merge==1)
        /* 
             (1) Self-employed, white, south
             (2) Self-employed, white, nonsouth
             (3) Farmer, white, south
             (4) Farmer, white, nonsouth 
             (5) Farmer, Black, south
        */

    * Only compare a few variables for farmers across data sources 
        local vars "own_radio grade8_grad hs_grad coll_ba"

        foreach v of local vars {
            replace `v' =. if occ1950ej!=81 & data_source==1 
        } 

    * Set up some labels for the table
        label var deep_south "Lives in deep South"
        label var age "Age"
        label var married "Married"
        label var kidsperdad "# of kids <18 living in dad's household"
        label var own_home1 "Owns his home"
        label var own_home2 "Owns home/owns any acres (proxy)"
        label var own_radio "Owns radio"
        label var grade8_grad "8th grade graduate"
        label var hs_grad "HS graduate"
        label var coll_ba "College graduate"
        label var reg1 "Lives in Northeast"
        label var reg2 "Lives in Midwest"
        label var reg3 "Lives in the South"
        label var reg4 "Lives in the West"

        ren reg* region*
        sort occ1950ej race south_merge data_source

    * Generate share of occ x race x south cell in each data source
      /* But will only put Census shares in the table */
        gen share_census=.
        by occ1950ej race south_merge data_source: replace share_census=_N/tot_dads
        replace share_census=. if data_source==1
    
    * Make triplets
        egen cell = group(occ1950ej race south_merge)

*---------------------------------------------------------------------------------------------------*
*---------------------------------------------------------------------------------------------------*
   
*----------------------------------------*
* STEP 5: MAKE TABLE OF SUMMARY STATS
*----------------------------------------*

    file open weirdos using "$Mydirectory2/appendix_e/1936_1930_40census_comp.tex", write replace

    file write weirdos "\begin{tabularx}{\hsize}{@{\hskip\tabcolsep\extracolsep\fill} l c c c}" _n
    file write weirdos "\toprule" _n
    file write weirdos "\addlinespace[0.75ex]" _n    
    file write weirdos "& 1930 Census & 1936 Survey & 1940 Census \\" _n
    file write weirdos "\addlinespace[0.25ex]" _n    
    file write weirdos "\midrule" _n

    foreach cell in 1 2 3 4 5 {

        if `cell'==1 local cellname "Self-employed $\times$ white $\times$ non-South"
        if `cell'==2 local cellname "Self-employed $\times$ white $\times$ South"
        if `cell'==3 local cellname "Farmer $\times$ white $\times$ non-South"
        if `cell'==4 local cellname "Farmer $\times$ white $\times$ South"
        if `cell'==5 local cellname "Farmer $\times$ Black $\times$ South"

        file write weirdos "\addlinespace[1.25ex]" _n
        file write weirdos "\emph{`cellname'} & & & \\" _n
        
        local varlist "age grade8_grad hs_grad region1 region2 region3 region4 married kidsperdad own_home2 tot_fam_inc share_census" 
            
            foreach vv of local varlist {

            if "`vv'"=="age" local label1 "Age"
            if "`vv'"=="grade8_grad" local label1 "Completed 8th grade"
            if "`vv'"=="hs_grad" local label1 "Completed HS"
            if "`vv'"=="region1" local label1 "Lives in Northeast"
            if "`vv'"=="region2" local label1 "Lives in Midwest"
            if "`vv'"=="region3" local label1 "Lives in South"
            if "`vv'"=="region4" local label1 "Lives in West"
            if "`vv'"=="married" local label1 "Married"
            if "`vv'"=="kidsperdad" local label1 "\# of kids $<$18 in household"
            if "`vv'"=="own_home2" local label1 "Owns his home"
            if "`vv'"=="tot_fam_inc" local label1 "Family income, 1936\textdollar"
            if "`vv'"=="share_census" local label1 "Share in Census"

            * Skips
            if inlist(`cell',1,2) & ("`vv'"=="grade8_grad" | "`vv'"=="hs_grad") continue
            if inlist(`cell',1,3) & ("`vv'"=="region3") continue
            if inlist(`cell',2,4,5) & ("`vv'"=="region1" | "`vv'"=="region2" | "`vv'"=="region3" | "`vv'"=="region4") continue

                file write weirdos "\quad `label1'" 
                 foreach s in 2 1 3 {

                    summ `vv' [aw=weight] if cell==`cell' & data_source==`s'
                    
                        if `r(N)'!=0 {
                            local share1 = `r(mean)'

                            if "`vv'" != "tot_fam_inc" local share2: display %-09.2fc `share1'
                            if "`vv'" == "tot_fam_inc" local share2: display %-09.0fc `share1'
                        }

                        if `r(N)'==0  {
                            local share2 "---"
                        }

                    file write weirdos "& `share2'" 
                    if `s'==3  file write weirdos "\\" _n  
                } 
 
            } 

        file write weirdos "\addlinespace[1ex]" _n 
        file write weirdos "\midrule" _n    
        file write weirdos "\addlinespace[0.5ex]" _n
        file write weirdos "Observations"

        foreach t in 2 1 3 {
            count if cell==`cell' & data_source==`t'
            local count1 = `r(N)'
            local count2: display %-09.0fc `count1'

            file write weirdos "& `count2'"
        }


        file write weirdos " \\" _n //end "Observations" row
        file write weirdos "\addlinespace[0.05ex]" _n
        if `cell'!=5 file write weirdos "\midrule" _n   
        if `cell'==5 file write weirdos "\bottomrule" _n       
    
    }

    file write weirdos "\end{tabularx}" _n
    file write weirdos "\thispagestyle{empty}"

    file close weirdos